************************************************************************************
** Replication code
** Authors: Meilin Ma, Fei Qin, Jayson Lusk
** Title: Retailer assortment under demand shock    
** Date: 05/26/2024
** Figure 1: Changes in US County Median Household Incomes and Average House Values from 2007 to 2010                       
************************************************************************************

clear all

*********************************************************
** prepare house value and median income datasets
*********************************************************

** house value data
** Convert monthly house value data into yearly level (take the average)
use "c_3603_yrnm.dta" , clear 
collapse (mean) hv_ad_r , by(statecountycode state county year)

rename statecountycode fips
rename hv_ad_r homevalue

drop if fips == ".."

destring fips , replace
replace homevalue = homevalue/10
** unit is 10k real dollars
save temp.dta , replace

** create 2007 and 2010 datasets separately
keep if year == 2007
drop year
save "homevalue_2007.dta" , replace

use temp.dta, clear
keep if year == 2010
drop year 
save "homevalue_2010.dta" , replace

*********************
** change over time
********************
clear all
use homevalue_2007.dta

**
rename homevalue hv2007

** 1800 matched
merge 1:1 state county fips using homevalue_2010.dta
keep if _merge==3
drop _merge

**
rename homevalue hv2010

**
gen hvdiff = hv2010-hv2007
sum hvdiff
save hvdiff.dta, replace

** income data
clear all 
use "c_3603_yrnm.dta" , clear 
collapse (mean) median_inc_r , by(statecountycode state county year)
rename statecountycode fips
rename median_inc_r medianincome
drop if fips == ".."

destring fips , replace
save temp.dta, replace

** create 2007 and 2010 datasets separately
keep if year == 2007
drop year
save "medianincome_2007.dta" , replace

use temp.dta, clear
keep if year == 2010
drop year 
save "medianincome_2010.dta" , replace
erase temp.dta

*********************
** change over time
********************
clear all
use medianincome_2007.dta

**
rename medianincome inc2007

** 1800 matched
merge 1:1 state county fips using medianincome_2010.dta
keep if _merge==3
drop _merge

**
rename medianincome inc2010

**
gen incdiff = inc2010-inc2007
sum incdiff
save incdiff.dta, replace


*********************************************************
** Create the map 
*********************************************************

clear all

* map data source:
* https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html

** create dataset containing geoid

spshape2dta "cb_2019_us_county_500k.shp", saving(usacounties) replace
use usacounties_shp.dta, clear
describe
list _ID _X _Y shape_order in 1/10, abbreviate(11)
use usacounties.dta, clear
describe
generate fips = real(GEOID)
list _ID GEOID fips NAME in 1/10, separator(0)
save usacounties.dta , replace

***********************************************

*** house value, real, 2010-2007
*cd "D:\Research\USPA\Empirics\Maps\cb_2018_us_county_500k"
use _ID _CX _CY GEOID fips using usacounties.dta , clear

**
merge 1:m fips using "hvdiff.dta" , keepusing(hvdiff state county)
drop if _merge == 1 
drop _merge

** total obs. 1800
describe

save "geo_hvdiff.dta" , replace

grmap, activate
drop if state == 2 | state == 15

** decrease in house value
*replace hvdiff = -hvdiff
sum hvdiff , de

**
spset, modify shpfile(usacounties_shp)
pctile pc1 = hvdiff, nq(5) genp(percent) 
ta pc1

**
grmap hvdiff , clnumber(7) fcolor(red*1.5 red*1 red*.5 red*.3 red*.1 green*0.3) ocolor(none ..) ///
clmethod(custom) clbreak(-34.7 -4.9 -2.3 -1.4 -0.8 0 3.3)

** unit is 10k real dollars
drop pc1 percent
graph export "hvdiff.jpg", replace



*** median income, real
use _ID _CX _CY GEOID fips using usacounties.dta , clear

**
merge 1:m fips using "incdiff.dta" , keepusing(incdiff state county)
drop if _merge == 1 
drop _merge

** total obs. 1800
describe

save "geo_incdiff.dta" , replace

**
grmap, activate
drop if state == 2 | state == 15

** decrease in house value
* replace incdiff = -incdiff
sum incdiff , de

**
spset, modify shpfile(usacounties_shp)
pctile pc1 = incdiff, nq(5) genp(percent) 
ta pc1

**
grmap incdiff , clnumber(7) fcolor(red*1.5 red*1 red*.5 red*.3 red*.1 green*0.3) ocolor(none ..) ///
clmethod(custom) clbreak(-16.9 -5.1 -3.5 -2.2 -0.5 0 9.2)

** unit is 1k real dollars
drop pc1 percent
graph export "incdiff.jpg", replace


